import time
time_start_notebook = time.time()
%%capture
import sys
ENV_COLAB = 'google.colab' in sys.modules
if ENV_COLAB:
# usual imports
!pip install watermark
!pip install scikit-plot
!pip install --upgrade git+https://github.com/stanfordmlgroup/ngboost.git
# HPO
!git clone https://github.com/thuijskens/scikit-hyperband.git
sys.path.append('scikit-hyperband/hyperband')
print('Environment: Google Colab')
sys.path.append("/Users/poudel/Dropbox/a00_Resources/hyperband")
try:
from search import HyperbandSearchCV
print('File found: search.py')
except:
print('File not found: search.py')
try:
from hyperband_search import HyperbandSearchCV
print('File found: hyperband_search.py')
except:
print('File not found: hyperband_search.py')
File not found: search.py File found: hyperband_search.py
import numpy as np
import pandas as pd
import seaborn as sns
import os,sys,time
import matplotlib.pyplot as plt
import joblib
from tqdm import tqdm_notebook as tqdm
import plotly_express as px
# modelling
from sklearn.preprocessing import OneHotEncoder
import imblearn
from imblearn.over_sampling import SMOTE
import sklearn.metrics as skmetrics
# pipeline
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.compose import ColumnTransformer, make_column_transformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.preprocessing import FunctionTransformer
# boosting
import ngboost as ngb
# settings
sns.set()
SEED = 100
pd.set_option('max_columns',100)
pd.set_option('max_colwidth',200)
pd.set_option('plotting.backend','matplotlib') # matplotlib, bokeh, altair, plotly
%matplotlib inline
%load_ext watermark
%watermark -iv
json 2.0.9 numpy 1.19.4 plotly_express 0.4.1 seaborn 0.11.0 pandas 1.1.4 joblib 0.17.0 imblearn 0.7.0 autopep8 1.5.2
def show_methods(obj, ncols=4,contains=None):
lst = [i for i in dir(obj) if i[0]!='_' ]
if contains is not None:
lst = [i for i in lst if contains in i]
df = pd.DataFrame(np.array_split(lst,ncols)).T.fillna('')
return df
def model_eval_bin(model_name,ytest,ypreds,yprobs2d,show_plots=True):
import sklearn.metrics as skmetrics
import scikitplot.metrics as skpmetrics
import os
acc = skmetrics.accuracy_score(ytest,ypreds)
precision = skmetrics.precision_score(ytest,ypreds)
recall = skmetrics.recall_score(ytest,ypreds)
f1 = skmetrics.f1_score(ytest,ypreds)
auc = skmetrics.roc_auc_score(ytest,ypreds)
print(skmetrics.classification_report(ytest,ypreds))
print(skmetrics.confusion_matrix(ytest,ypreds))
df_res = pd.DataFrame({'Accuracy':[acc],
'Precision': [precision],
'Recall': [recall],
'F1-score': [f1],
'AUC': [auc]},index=[model_name])
display(df_res.style.format("{:.4f}"))
if not os.path.isdir('../outputs'):
os.makedirs('../outputs')
o = '.' if ENV_COLAB else '../outputs/'
df_res.to_csv(o+f'model_{model_name}.csv',index=True)
if show_plots:
skpmetrics.plot_precision_recall(ytest,yprobs2d) # more focus on minority
skpmetrics.plot_roc_curve(ytest,yprobs2d) # equal focus on both groups
skpmetrics.plot_confusion_matrix(ytest,ypreds)
def get_profit(y_true, y_pred):
tn, fp, fn, tp = skmetrics.confusion_matrix(y_true,y_pred).ravel()
profit = 400*tp - 200*fn - 100*fp
return profit
scoring = skmetrics.make_scorer(get_profit, greater_is_better=True)
path_data_train = '../data/raw/train.csv'
path_data_test = '../data/raw/test.csv'
if ENV_COLAB:
path_data_train = 'https://raw.githubusercontent.com/bhishanpdl/Datasets/master/Projects/Telco_Customer_Churn/raw/train.csv'
path_data_test = 'https://raw.githubusercontent.com/bhishanpdl/Datasets/master/Projects/Telco_Customer_Churn/raw/test.csv'
df_train = pd.read_csv(path_data_train)
df_test = pd.read_csv(path_data_test)
print(df_train.shape)
print(df_test.shape)
df_train.head(2).append(df_train.tail(2))
(5634, 21) (1409, 21)
| customerID | gender | SeniorCitizen | Partner | Dependents | tenure | PhoneService | MultipleLines | InternetService | OnlineSecurity | OnlineBackup | DeviceProtection | TechSupport | StreamingTV | StreamingMovies | Contract | PaperlessBilling | PaymentMethod | MonthlyCharges | TotalCharges | Churn | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1621-YNCJH | Female | 0 | Yes | No | 36 | Yes | Yes | Fiber optic | Yes | Yes | Yes | Yes | No | Yes | Two year | Yes | Credit card (automatic) | 106.05 | 3834.4 | No |
| 1 | 7143-BQIBA | Male | 0 | No | No | 10 | Yes | No | DSL | Yes | No | No | Yes | Yes | No | Month-to-month | No | Bank transfer (automatic) | 62.25 | 612.95 | No |
| 5632 | 0862-PRCBS | Female | 0 | Yes | Yes | 68 | Yes | Yes | Fiber optic | No | Yes | No | Yes | Yes | Yes | Two year | Yes | Credit card (automatic) | 103.75 | 7039.45 | No |
| 5633 | 4656-CAURT | Male | 0 | No | No | 69 | Yes | Yes | No | No internet service | No internet service | No internet service | No internet service | No internet service | No internet service | Two year | No | Bank transfer (automatic) | 23.95 | 1713.1 | No |
target_name = 'Churn'
px.histogram(df_train, x=target_name,height=300,width=300)
px.histogram(df_train, x='gender', color=target_name,height=300,width=300)
df_train['TotalCharges'] = pd.to_numeric(df_train['TotalCharges'],errors='coerce').fillna(0)
df_test['TotalCharges'] = pd.to_numeric(df_test['TotalCharges'],errors='coerce').fillna(0)
df_train['SeniorCitizen'] = df_train['SeniorCitizen'].map({0:'No',1:'Yes'})
df_test['SeniorCitizen'] = df_test['SeniorCitizen'].map({0:'No',1:'Yes'})
df_Xtrain = df_train.drop(target_name,axis=1)
df_Xtest = df_test.drop(target_name,axis=1)
ser_ytrain = df_train[target_name].map({'No':0,'Yes':1})
ser_ytest = df_test[target_name].map({'No':0,'Yes':1})
ytrain = np.array(ser_ytrain).flatten()
ytest = np.array(ser_ytest).flatten()
index_name = 'customerID'
ser_train_ids = df_Xtrain.pop(index_name)
ser_test_ids = df_Xtest.pop(index_name)
df_Xtrain.head(2)
| gender | SeniorCitizen | Partner | Dependents | tenure | PhoneService | MultipleLines | InternetService | OnlineSecurity | OnlineBackup | DeviceProtection | TechSupport | StreamingTV | StreamingMovies | Contract | PaperlessBilling | PaymentMethod | MonthlyCharges | TotalCharges | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Female | No | Yes | No | 36 | Yes | Yes | Fiber optic | Yes | Yes | Yes | Yes | No | Yes | Two year | Yes | Credit card (automatic) | 106.05 | 3834.40 |
| 1 | Male | No | No | No | 10 | Yes | No | DSL | Yes | No | No | Yes | Yes | No | Month-to-month | No | Bank transfer (automatic) | 62.25 | 612.95 |
cols_num = list(df_train.select_dtypes('number').columns)
cols_num
['tenure', 'MonthlyCharges', 'TotalCharges']
cols_cat = list(df_train.select_dtypes('object').columns)
# gender is no good predictor as seen in EDA
cols_exclude = ['customerID','gender','TotalCharges'] + [target_name]
cols_cat = [ i for i in cols_cat if i not in cols_exclude ] + ['SeniorCitizen']
print(cols_cat)
['SeniorCitizen', 'Partner', 'Dependents', 'PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling', 'PaymentMethod', 'SeniorCitizen']
cols_num = ['TotalCharges','tenure', 'MonthlyCharges']
cols_num_old = cols_num
cols_cat_old = cols_cat
def combine_two_features(dfx,A,B):
dfx = dfx.copy()
assert len(A) == len(B)
for a,b in zip(A,B):
dfx[a+'_'+b] = dfx[a] + '_' + dfx[b]
return dfx
combineA = ['Partner']
combineB = ['Dependents']
combineA = combineA + ['SeniorCitizen']*5
combineB = combineB + ['Dependents','Partner','Contract',
'TechSupport','PaymentMethod']
cols_cat_new = [f'{a}_{b}' for a,b in zip(combineA,combineB)]
cols_cat = list(set(cols_cat + cols_cat_new))
print(cols_cat_new)
# print(cols_cat)
df_Xtrain = combine_two_features(df_Xtrain,combineA,combineB)
df_Xtest = combine_two_features(df_Xtest,combineA,combineB)
['Partner_Dependents', 'SeniorCitizen_Dependents', 'SeniorCitizen_Partner', 'SeniorCitizen_Contract', 'SeniorCitizen_TechSupport', 'SeniorCitizen_PaymentMethod']
def create_groupby_features(dfx,cat,num,agg):
dfx = dfx.copy()
for c in cat:
for n in num:
for a in agg:
name = f"{c}_{n}_{a}"
dfx[name] = df_train.groupby(c)[n].transform(a)
return dfx
# Using more features gave me worse AUC.
# cols_grpcat = ['Contract','PaymentMethod']
# cols_grpnum = ['TotalCharges','MonthlyCharges']
# cols_grpagg = ['mean', 'max', 'min']
cols_grpcat = ['Contract']
cols_grpnum = ['TotalCharges']
cols_grpagg = ['mean']
cols_num_new = [f'{c}_{n}_{a}'
for c in cols_grpcat
for n in cols_grpnum
for a in cols_grpagg]
cols_num = list(set(cols_num + cols_num_new))
print(cols_num_new)
# print(cols_num)
df_Xtrain = create_groupby_features(df_Xtrain,cols_grpcat, cols_grpnum, cols_grpagg)
df_Xtest = create_groupby_features(df_Xtest,cols_grpcat, cols_grpnum, cols_grpagg)
['Contract_TotalCharges_mean']
df_Xtrain.head(2)
| gender | SeniorCitizen | Partner | Dependents | tenure | PhoneService | MultipleLines | InternetService | OnlineSecurity | OnlineBackup | DeviceProtection | TechSupport | StreamingTV | StreamingMovies | Contract | PaperlessBilling | PaymentMethod | MonthlyCharges | TotalCharges | Partner_Dependents | SeniorCitizen_Dependents | SeniorCitizen_Partner | SeniorCitizen_Contract | SeniorCitizen_TechSupport | SeniorCitizen_PaymentMethod | Contract_TotalCharges_mean | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Female | No | Yes | No | 36 | Yes | Yes | Fiber optic | Yes | Yes | Yes | Yes | No | Yes | Two year | Yes | Credit card (automatic) | 106.05 | 3834.40 | Yes_No | No_No | No_Yes | No_Two year | No_Yes | No_Credit card (automatic) | 3683.643192 |
| 1 | Male | No | No | No | 10 | Yes | No | DSL | Yes | No | No | Yes | Yes | No | Month-to-month | No | Bank transfer (automatic) | 62.25 | 612.95 | No_No | No_No | No_No | No_Month-to-month | No_Yes | No_Bank transfer (automatic) | 1370.923131 |
cols_drop = ['gender']
df_Xtrain = df_Xtrain.drop(cols_drop,axis=1)
df_Xtest = df_Xtest.drop(cols_drop,axis=1)
all_features = df_Xtrain.columns.tolist()
cols_cat_idx = [all_features.index(i)
for i in cols_cat]
# make sure no nans
df_Xtrain.isna().sum().sum(), df_Xtest.isna().sum().sum()
(0, 0)
df_Xtrain_full = df_Xtrain.copy()
ser_ytrain_full = ser_ytrain.copy()
ytrain_full = np.array(ser_ytrain_full).flatten()
df_Xtrain_full = pd.get_dummies(df_Xtrain_full,columns=cols_cat,drop_first=False)
df_Xtrain = pd.get_dummies(df_Xtrain,columns=cols_cat,drop_first=False)
df_Xtest = pd.get_dummies(df_Xtest,columns=cols_cat,drop_first=False)
df_Xtrain_full.head(2).append(df_Xtrain.head(2)).append(df_Xtest.head(2))
| tenure | MonthlyCharges | TotalCharges | Contract_TotalCharges_mean | OnlineBackup_No | OnlineBackup_No internet service | OnlineBackup_Yes | DeviceProtection_No | DeviceProtection_No internet service | DeviceProtection_Yes | SeniorCitizen_PaymentMethod_No_Bank transfer (automatic) | SeniorCitizen_PaymentMethod_No_Credit card (automatic) | SeniorCitizen_PaymentMethod_No_Electronic check | SeniorCitizen_PaymentMethod_No_Mailed check | SeniorCitizen_PaymentMethod_Yes_Bank transfer (automatic) | SeniorCitizen_PaymentMethod_Yes_Credit card (automatic) | SeniorCitizen_PaymentMethod_Yes_Electronic check | SeniorCitizen_PaymentMethod_Yes_Mailed check | PhoneService_No | PhoneService_Yes | SeniorCitizen_Contract_No_Month-to-month | SeniorCitizen_Contract_No_One year | SeniorCitizen_Contract_No_Two year | SeniorCitizen_Contract_Yes_Month-to-month | SeniorCitizen_Contract_Yes_One year | SeniorCitizen_Contract_Yes_Two year | Contract_Month-to-month | Contract_One year | Contract_Two year | TechSupport_No | TechSupport_No internet service | TechSupport_Yes | SeniorCitizen_Partner_No_No | SeniorCitizen_Partner_No_Yes | SeniorCitizen_Partner_Yes_No | SeniorCitizen_Partner_Yes_Yes | StreamingTV_No | StreamingTV_No internet service | StreamingTV_Yes | Dependents_No | Dependents_Yes | PaymentMethod_Bank transfer (automatic) | PaymentMethod_Credit card (automatic) | PaymentMethod_Electronic check | PaymentMethod_Mailed check | MultipleLines_No | MultipleLines_No phone service | MultipleLines_Yes | OnlineSecurity_No | OnlineSecurity_No internet service | OnlineSecurity_Yes | SeniorCitizen_No | SeniorCitizen_Yes | Partner_No | Partner_Yes | InternetService_DSL | InternetService_Fiber optic | InternetService_No | Partner_Dependents_No_No | Partner_Dependents_No_Yes | Partner_Dependents_Yes_No | Partner_Dependents_Yes_Yes | SeniorCitizen_TechSupport_No_No | SeniorCitizen_TechSupport_No_No internet service | SeniorCitizen_TechSupport_No_Yes | SeniorCitizen_TechSupport_Yes_No | SeniorCitizen_TechSupport_Yes_No internet service | SeniorCitizen_TechSupport_Yes_Yes | SeniorCitizen_Dependents_No_No | SeniorCitizen_Dependents_No_Yes | SeniorCitizen_Dependents_Yes_No | SeniorCitizen_Dependents_Yes_Yes | PaperlessBilling_No | PaperlessBilling_Yes | StreamingMovies_No | StreamingMovies_No internet service | StreamingMovies_Yes | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 36 | 106.05 | 3834.40 | 3683.643192 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 |
| 1 | 10 | 62.25 | 612.95 | 1370.923131 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 |
| 0 | 36 | 106.05 | 3834.40 | 3683.643192 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 |
| 1 | 10 | 62.25 | 612.95 | 1370.923131 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 |
| 0 | 1 | 48.60 | 48.60 | 3683.643192 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 |
| 1 | 56 | 99.90 | 5706.30 | 1370.923131 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 |
ser_ytrain_full.head(2).append(ser_ytrain.head(2)).append(ser_ytest.head(2))
0 0 1 0 0 0 1 0 0 1 1 0 Name: Churn, dtype: int64
from sklearn.model_selection import train_test_split
df_Xtrain, df_Xvalid, ser_ytrain, ser_yvalid = train_test_split(
df_Xtrain_full, ser_ytrain_full,
test_size=0.2,
random_state=SEED,
stratify=ser_ytrain_full)
ytrain = ser_ytrain.to_numpy().ravel()
yvalid = ser_yvalid.to_numpy().ravel()
print(f"df_train : {df_train.shape}\n")
print(f"df_Xtrain : {df_Xtrain.shape}")
print(f"ser_ytrain : {ser_ytrain.shape}\n")
print(f"df_Xvalid : {df_Xvalid.shape}")
print(f"ser_yvalid : {ser_yvalid.shape}\n")
print(f"df_test : {df_test.shape}")
print(f"ser_ytest : This does not exist.")
df_Xtrain.head(2)
df_train : (5634, 21) df_Xtrain : (4507, 77) ser_ytrain : (4507,) df_Xvalid : (1127, 77) ser_yvalid : (1127,) df_test : (1409, 21) ser_ytest : This does not exist.
| tenure | MonthlyCharges | TotalCharges | Contract_TotalCharges_mean | OnlineBackup_No | OnlineBackup_No internet service | OnlineBackup_Yes | DeviceProtection_No | DeviceProtection_No internet service | DeviceProtection_Yes | SeniorCitizen_PaymentMethod_No_Bank transfer (automatic) | SeniorCitizen_PaymentMethod_No_Credit card (automatic) | SeniorCitizen_PaymentMethod_No_Electronic check | SeniorCitizen_PaymentMethod_No_Mailed check | SeniorCitizen_PaymentMethod_Yes_Bank transfer (automatic) | SeniorCitizen_PaymentMethod_Yes_Credit card (automatic) | SeniorCitizen_PaymentMethod_Yes_Electronic check | SeniorCitizen_PaymentMethod_Yes_Mailed check | PhoneService_No | PhoneService_Yes | SeniorCitizen_Contract_No_Month-to-month | SeniorCitizen_Contract_No_One year | SeniorCitizen_Contract_No_Two year | SeniorCitizen_Contract_Yes_Month-to-month | SeniorCitizen_Contract_Yes_One year | SeniorCitizen_Contract_Yes_Two year | Contract_Month-to-month | Contract_One year | Contract_Two year | TechSupport_No | TechSupport_No internet service | TechSupport_Yes | SeniorCitizen_Partner_No_No | SeniorCitizen_Partner_No_Yes | SeniorCitizen_Partner_Yes_No | SeniorCitizen_Partner_Yes_Yes | StreamingTV_No | StreamingTV_No internet service | StreamingTV_Yes | Dependents_No | Dependents_Yes | PaymentMethod_Bank transfer (automatic) | PaymentMethod_Credit card (automatic) | PaymentMethod_Electronic check | PaymentMethod_Mailed check | MultipleLines_No | MultipleLines_No phone service | MultipleLines_Yes | OnlineSecurity_No | OnlineSecurity_No internet service | OnlineSecurity_Yes | SeniorCitizen_No | SeniorCitizen_Yes | Partner_No | Partner_Yes | InternetService_DSL | InternetService_Fiber optic | InternetService_No | Partner_Dependents_No_No | Partner_Dependents_No_Yes | Partner_Dependents_Yes_No | Partner_Dependents_Yes_Yes | SeniorCitizen_TechSupport_No_No | SeniorCitizen_TechSupport_No_No internet service | SeniorCitizen_TechSupport_No_Yes | SeniorCitizen_TechSupport_Yes_No | SeniorCitizen_TechSupport_Yes_No internet service | SeniorCitizen_TechSupport_Yes_Yes | SeniorCitizen_Dependents_No_No | SeniorCitizen_Dependents_No_Yes | SeniorCitizen_Dependents_Yes_No | SeniorCitizen_Dependents_Yes_Yes | PaperlessBilling_No | PaperlessBilling_Yes | StreamingMovies_No | StreamingMovies_No internet service | StreamingMovies_Yes | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 4555 | 16 | 19.75 | 294.90 | 1370.923131 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 |
| 3379 | 72 | 64.70 | 4746.05 | 3683.643192 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 |
Dist = distribution eg. Bernoulli,
Score = score eg. LogScore,
Base = eg. DecisionTreeRegressor()
natural_gradient = True,
n_estimators = 500,
learning_rate = 0.01,
minibatch_frac = 1.0,
col_sample = 1.0,
verbose = True,
verbose_eval = 100,
tol = 0.0001,
random_state = None,
#=================================
NGBClassifier.fit(X,Y,
X_val = None,
Y_val = None,
sample_weight = None,
val_sample_weight = None,
train_loss_monitor = None,
val_loss_monitor = None,
early_stopping_rounds = None,
)
from ngboost import NGBClassifier
# NGBClassifier?
show_methods(ngb)
| 0 | 1 | 2 | 3 | |
|---|---|---|---|---|
| 0 | NGBClassifier | NGBoost | helpers | ngboost |
| 1 | NGBRegressor | api | learners | scores |
| 2 | NGBSurvival | distns | manifold |
show_methods(ngb.NGBClassifier)
| 0 | 1 | 2 | 3 | |
|---|---|---|---|---|
| 0 | feature_importances_ | get_params | predict | set_params |
| 1 | fit | line_search | predict_proba | staged_pred_dist |
| 2 | fit_base | pred_dist | sample | staged_predict |
| 3 | fit_init_params_to_marginal | pred_param | score | staged_predict_proba |
# NGBClassifier.fit?
df_Xtrain.head()
| tenure | MonthlyCharges | TotalCharges | Contract_TotalCharges_mean | OnlineBackup_No | OnlineBackup_No internet service | OnlineBackup_Yes | DeviceProtection_No | DeviceProtection_No internet service | DeviceProtection_Yes | SeniorCitizen_PaymentMethod_No_Bank transfer (automatic) | SeniorCitizen_PaymentMethod_No_Credit card (automatic) | SeniorCitizen_PaymentMethod_No_Electronic check | SeniorCitizen_PaymentMethod_No_Mailed check | SeniorCitizen_PaymentMethod_Yes_Bank transfer (automatic) | SeniorCitizen_PaymentMethod_Yes_Credit card (automatic) | SeniorCitizen_PaymentMethod_Yes_Electronic check | SeniorCitizen_PaymentMethod_Yes_Mailed check | PhoneService_No | PhoneService_Yes | SeniorCitizen_Contract_No_Month-to-month | SeniorCitizen_Contract_No_One year | SeniorCitizen_Contract_No_Two year | SeniorCitizen_Contract_Yes_Month-to-month | SeniorCitizen_Contract_Yes_One year | SeniorCitizen_Contract_Yes_Two year | Contract_Month-to-month | Contract_One year | Contract_Two year | TechSupport_No | TechSupport_No internet service | TechSupport_Yes | SeniorCitizen_Partner_No_No | SeniorCitizen_Partner_No_Yes | SeniorCitizen_Partner_Yes_No | SeniorCitizen_Partner_Yes_Yes | StreamingTV_No | StreamingTV_No internet service | StreamingTV_Yes | Dependents_No | Dependents_Yes | PaymentMethod_Bank transfer (automatic) | PaymentMethod_Credit card (automatic) | PaymentMethod_Electronic check | PaymentMethod_Mailed check | MultipleLines_No | MultipleLines_No phone service | MultipleLines_Yes | OnlineSecurity_No | OnlineSecurity_No internet service | OnlineSecurity_Yes | SeniorCitizen_No | SeniorCitizen_Yes | Partner_No | Partner_Yes | InternetService_DSL | InternetService_Fiber optic | InternetService_No | Partner_Dependents_No_No | Partner_Dependents_No_Yes | Partner_Dependents_Yes_No | Partner_Dependents_Yes_Yes | SeniorCitizen_TechSupport_No_No | SeniorCitizen_TechSupport_No_No internet service | SeniorCitizen_TechSupport_No_Yes | SeniorCitizen_TechSupport_Yes_No | SeniorCitizen_TechSupport_Yes_No internet service | SeniorCitizen_TechSupport_Yes_Yes | SeniorCitizen_Dependents_No_No | SeniorCitizen_Dependents_No_Yes | SeniorCitizen_Dependents_Yes_No | SeniorCitizen_Dependents_Yes_Yes | PaperlessBilling_No | PaperlessBilling_Yes | StreamingMovies_No | StreamingMovies_No internet service | StreamingMovies_Yes | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 4555 | 16 | 19.75 | 294.90 | 1370.923131 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 |
| 3379 | 72 | 64.70 | 4746.05 | 3683.643192 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 |
| 1713 | 67 | 109.70 | 7344.45 | 3018.965636 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 |
| 2399 | 47 | 99.70 | 4747.20 | 1370.923131 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 |
| 1096 | 46 | 40.40 | 1842.70 | 3683.643192 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 |
model = NGBClassifier(random_state=SEED)
model.fit(df_Xtrain,ser_ytrain,
X_val=df_Xvalid,Y_val=ser_yvalid,
early_stopping_rounds=50
)
[iter 0] loss=0.5786 val_loss=0.5734 scale=2.0000 norm=4.0000 [iter 100] loss=0.4137 val_loss=0.4357 scale=2.0000 norm=3.4542 [iter 200] loss=0.3960 val_loss=0.4255 scale=1.0000 norm=1.7693 [iter 300] loss=0.3899 val_loss=0.4235 scale=1.0000 norm=1.7927 [iter 400] loss=0.3867 val_loss=0.4230 scale=1.0000 norm=1.8030
NGBClassifier(random_state=RandomState(MT19937) at 0x7FE19C3577C0)
ypreds = model.predict(df_Xtest)
yprobs2d = model.predict_proba(df_Xtest)
model_eval_bin('ngboost',ytest,ypreds,yprobs2d,show_plots=False)
profit = get_profit(ytest,ypreds)
print(f'profit = ${profit:,d}')
precision recall f1-score support
0 0.79 0.94 0.86 1035
1 0.65 0.31 0.42 374
accuracy 0.77 1409
macro avg 0.72 0.62 0.64 1409
weighted avg 0.75 0.77 0.74 1409
[[973 62]
[259 115]]
| Accuracy | Precision | Recall | F1-score | AUC | |
|---|---|---|---|---|---|
| ngboost | 0.7722 | 0.6497 | 0.3075 | 0.4174 | 0.6238 |
profit = $-12,000
# ypreds[:5],yprobs2d[:5]
import optuna
optuna.logging.set_verbosity(optuna.logging.WARNING) # use INFO to see progress
from optuna.pruners import SuccessiveHalvingPruner
show_methods(optuna)
| 0 | 1 | 2 | 3 | |
|---|---|---|---|---|
| 0 | Any | delete_study | load_study | structs |
| 1 | Study | distributions | logging | study |
| 2 | TYPE_CHECKING | exceptions | multi_objective | trial |
| 3 | Trial | get_all_study_summaries | progress_bar | type_checking |
| 4 | TrialPruned | importance | pruners | types |
| 5 | create_study | importlib | samplers | version |
| 6 | create_trial | integration | storages | visualization |
| 7 | dashboard |
def objective(trial):
params_fixed = dict(random_state= SEED)
b1 = DecisionTreeClassifier(criterion='friedman_mse', max_depth=2)
b2 = DecisionTreeClassifier(criterion='friedman_mse', max_depth=3)
b3 = DecisionTreeClassifier(criterion='friedman_mse', max_depth=4)
resource = 'n_estimators'
params_ngb_optuna = {
'n_estimators': trial.suggest_int('n_estimators', 100,2000),
'learning_rate': trial.suggest_loguniform('learning_rate', 0.01,1.0),
'col_sample': trial.suggest_uniform('col_sample', 0.6, 1),
'Base': trial.suggest_categorical('Base', [b1,b2,b3]),
}
model = NGBClassifier(**params_fixed,**params_ngb_optuna)
# fit the model
model.fit(df_Xtrain, ser_ytrain,
X_val=df_Xvalid,Y_val= ser_yvalid,
early_stopping_rounds=100
)
ypreds = model.predict(df_Xvalid)
ypreds = np.rint(ypreds)
#score = skmetrics.roc_auc_score(ser_yvalid.to_numpy().ravel(),ypreds)
score = get_profit(ser_yvalid.to_numpy().ravel(),ypreds)
return score
# NOTE: there is inherent non-determinism in optuna hyperparameter selection
# we may not get the same hyperparameters when run twice.
params_optuna_study = dict(
direction='maximize',
sampler=optuna.samplers.TPESampler(seed=SEED),
study_name='nbboost_optuna',
storage='sqlite:///ngboost_optuna_churn.db',
load_if_exists=True,
pruner=optuna.pruners.SuccessiveHalvingPruner(min_resource=100)
)
study = optuna.create_study(**params_optuna_study)
N_TRIALS = 1 # make it large
study.optimize(objective, n_trials=N_TRIALS,timeout=600)
/Users/poudel/opt/miniconda3/envs/dataSc/lib/python3.7/site-packages/optuna/distributions.py:406: UserWarning:
Choices for a categorical distribution should be a tuple of None, bool, int, float and str for persistent storage but contains DecisionTreeClassifier(criterion='friedman_mse', max_depth=2) which is of type DecisionTreeClassifier.
/Users/poudel/opt/miniconda3/envs/dataSc/lib/python3.7/site-packages/optuna/distributions.py:406: UserWarning:
Choices for a categorical distribution should be a tuple of None, bool, int, float and str for persistent storage but contains DecisionTreeClassifier(criterion='friedman_mse', max_depth=3) which is of type DecisionTreeClassifier.
/Users/poudel/opt/miniconda3/envs/dataSc/lib/python3.7/site-packages/optuna/distributions.py:406: UserWarning:
Choices for a categorical distribution should be a tuple of None, bool, int, float and str for persistent storage but contains DecisionTreeClassifier(criterion='friedman_mse', max_depth=4) which is of type DecisionTreeClassifier.
[W 2020-12-27 11:24:33,296] Trial 0 failed because of the following error: TypeError('Object of type DecisionTreeClassifier is not JSON serializable')
Traceback (most recent call last):
File "/Users/poudel/opt/miniconda3/envs/dataSc/lib/python3.7/site-packages/optuna/study.py", line 709, in _run_trial
result = func(trial)
File "<ipython-input-50-480b1f277421>", line 22, in objective
'Base': trial.suggest_categorical('Base', [b1,b2,b3]),
File "/Users/poudel/opt/miniconda3/envs/dataSc/lib/python3.7/site-packages/optuna/trial/_trial.py", line 489, in suggest_categorical
return self._suggest(name, CategoricalDistribution(choices=choices))
File "/Users/poudel/opt/miniconda3/envs/dataSc/lib/python3.7/site-packages/optuna/trial/_trial.py", line 683, in _suggest
storage.set_trial_param(trial_id, name, param_value_in_internal_repr, distribution)
File "/Users/poudel/opt/miniconda3/envs/dataSc/lib/python3.7/site-packages/optuna/storages/_cached_storage.py", line 237, in set_trial_param
study_id, trial_id, param_name, param_value_internal, distribution
File "/Users/poudel/opt/miniconda3/envs/dataSc/lib/python3.7/site-packages/optuna/storages/_rdb/storage.py", line 817, in _check_and_set_param_distribution
distribution_json=distributions.distribution_to_json(distribution),
File "/Users/poudel/opt/miniconda3/envs/dataSc/lib/python3.7/site-packages/optuna/distributions.py", line 476, in distribution_to_json
return json.dumps({"name": dist.__class__.__name__, "attributes": dist._asdict()})
File "/Users/poudel/opt/miniconda3/envs/dataSc/lib/python3.7/json/__init__.py", line 231, in dumps
return _default_encoder.encode(obj)
File "/Users/poudel/opt/miniconda3/envs/dataSc/lib/python3.7/json/encoder.py", line 199, in encode
chunks = self.iterencode(o, _one_shot=True)
File "/Users/poudel/opt/miniconda3/envs/dataSc/lib/python3.7/json/encoder.py", line 257, in iterencode
return _iterencode(o, 0)
File "/Users/poudel/opt/miniconda3/envs/dataSc/lib/python3.7/json/encoder.py", line 179, in default
raise TypeError(f'Object of type {o.__class__.__name__} '
TypeError: Object of type DecisionTreeClassifier is not JSON serializable
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-51-8d8fea4bb951> in <module> 15 16 N_TRIALS = 1 # make it large ---> 17 study.optimize(objective, n_trials=N_TRIALS,timeout=600) ~/opt/miniconda3/envs/dataSc/lib/python3.7/site-packages/optuna/study.py in optimize(self, func, n_trials, timeout, n_jobs, catch, callbacks, gc_after_trial, show_progress_bar) 290 if n_jobs == 1: 291 self._optimize_sequential( --> 292 func, n_trials, timeout, catch, callbacks, gc_after_trial, None 293 ) 294 else: ~/opt/miniconda3/envs/dataSc/lib/python3.7/site-packages/optuna/study.py in _optimize_sequential(self, func, n_trials, timeout, catch, callbacks, gc_after_trial, time_start) 652 break 653 --> 654 self._run_trial_and_callbacks(func, catch, callbacks, gc_after_trial) 655 656 self._progress_bar.update((datetime.datetime.now() - time_start).total_seconds()) ~/opt/miniconda3/envs/dataSc/lib/python3.7/site-packages/optuna/study.py in _run_trial_and_callbacks(self, func, catch, callbacks, gc_after_trial) 683 # type: (...) -> None 684 --> 685 trial = self._run_trial(func, catch, gc_after_trial) 686 if callbacks is not None: 687 frozen_trial = copy.deepcopy(self._storage.get_trial(trial._trial_id)) ~/opt/miniconda3/envs/dataSc/lib/python3.7/site-packages/optuna/study.py in _run_trial(self, func, catch, gc_after_trial) 707 708 try: --> 709 result = func(trial) 710 except exceptions.TrialPruned as e: 711 message = "Trial {} pruned. {}".format(trial_number, str(e)) <ipython-input-50-480b1f277421> in objective(trial) 20 'learning_rate': trial.suggest_loguniform('learning_rate', 0.01,1.0), 21 'col_sample': trial.suggest_uniform('col_sample', 0.6, 1), ---> 22 'Base': trial.suggest_categorical('Base', [b1,b2,b3]), 23 } 24 ~/opt/miniconda3/envs/dataSc/lib/python3.7/site-packages/optuna/trial/_trial.py in suggest_categorical(self, name, choices) 487 # CategoricalDistribution does not support dynamic value space. 488 --> 489 return self._suggest(name, CategoricalDistribution(choices=choices)) 490 491 def report(self, value, step): ~/opt/miniconda3/envs/dataSc/lib/python3.7/site-packages/optuna/trial/_trial.py in _suggest(self, name, distribution) 681 682 param_value_in_internal_repr = distribution.to_internal_repr(param_value) --> 683 storage.set_trial_param(trial_id, name, param_value_in_internal_repr, distribution) 684 685 return param_value ~/opt/miniconda3/envs/dataSc/lib/python3.7/site-packages/optuna/storages/_cached_storage.py in set_trial_param(self, trial_id, param_name, param_value_internal, distribution) 235 # already. 236 self._backend._check_and_set_param_distribution( --> 237 study_id, trial_id, param_name, param_value_internal, distribution 238 ) 239 self._studies[study_id].param_distribution[param_name] = distribution ~/opt/miniconda3/envs/dataSc/lib/python3.7/site-packages/optuna/storages/_rdb/storage.py in _check_and_set_param_distribution(self, study_id, trial_id, param_name, param_value_internal, distribution) 815 param_name=param_name, 816 param_value=param_value_internal, --> 817 distribution_json=distributions.distribution_to_json(distribution), 818 ) 819 ) ~/opt/miniconda3/envs/dataSc/lib/python3.7/site-packages/optuna/distributions.py in distribution_to_json(dist) 474 """ 475 --> 476 return json.dumps({"name": dist.__class__.__name__, "attributes": dist._asdict()}) 477 478 ~/opt/miniconda3/envs/dataSc/lib/python3.7/json/__init__.py in dumps(obj, skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, default, sort_keys, **kw) 229 cls is None and indent is None and separators is None and 230 default is None and not sort_keys and not kw): --> 231 return _default_encoder.encode(obj) 232 if cls is None: 233 cls = JSONEncoder ~/opt/miniconda3/envs/dataSc/lib/python3.7/json/encoder.py in encode(self, o) 197 # exceptions aren't as detailed. The list call should be roughly 198 # equivalent to the PySequence_Fast that ''.join() would do. --> 199 chunks = self.iterencode(o, _one_shot=True) 200 if not isinstance(chunks, (list, tuple)): 201 chunks = list(chunks) ~/opt/miniconda3/envs/dataSc/lib/python3.7/json/encoder.py in iterencode(self, o, _one_shot) 255 self.key_separator, self.item_separator, self.sort_keys, 256 self.skipkeys, _one_shot) --> 257 return _iterencode(o, 0) 258 259 def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, ~/opt/miniconda3/envs/dataSc/lib/python3.7/json/encoder.py in default(self, o) 177 178 """ --> 179 raise TypeError(f'Object of type {o.__class__.__name__} ' 180 f'is not JSON serializable') 181 TypeError: Object of type DecisionTreeClassifier is not JSON serializable
%%time
# Resume from last time
N_TRIALS = 10 # make it large
study = optuna.create_study(**params_optuna_study)
study.optimize(objective, n_trials=N_TRIALS,timeout=600)
print(f'Number of finished trials: {len(study.trials)}')
# best trail
best_trial = study.best_trial
# best params
params_best = study.best_trial.params
params_best
# params_best = {'learning_rate': 0.03702333216928129,
# 'max_depth': 6,
# 'n_estimators': 376,
# 'reg_lambda': 0.6177177549044198,
# 'scale_pos_weight': 5}
model = CatBoostClassifier(**params_best,cat_features=cols_cat_idx,
verbose=False,random_state=SEED)
model.fit(df_Xtrain_full,ytrain_full)
ypreds = model.predict(df_Xtest)
yprobs2d = model.predict_proba(df_Xtest)
model_eval_bin('catboost+optuna',ytest,ypreds,yprobs2d,show_plots=False)
profit = get_profit(ytest,ypreds)
print(f"profit = {profit:,d}")
model_eval_bin('catboost+optuna',ytest,ypreds,yprobs2d,show_plots=True)
time_taken = time.time() - time_start_notebook
h,m = divmod(time_taken,60*60)
print('Time taken to run whole notebook: {:.0f} hr '\
'{:.0f} min {:.0f} secs'.format(h, *divmod(m,60)))